rm(list = ls())
setwd("")

library(tidyverse)
library(haven)
library(readxl)

df_rep <- read_stata("./Data/Chiang_Kuo_JJPS_ReplicationData.dta")

#===========================================
#                Table 1  
#===========================================

df_rep <- df_rep %>%
  mutate(education = factor(education - 1),
         male = factor(male + 1), # let male = 2
         age_binned = case_when(
           age >= 20 & age < 30 ~ 1,
           age >= 30 & age < 40 ~ 2,
           age >= 40 & age < 50 ~ 3,
           age >= 50 & age <= 55 ~ 4
         ) %>% as.factor(),
         region = case_when(
           county %in% c(1, 2, 3, 4, 5, 6, 17) ~ 1,
           county %in% c(7, 8, 9, 10, 11) ~ 2,
           county %in% c(12, 13, 14, 15, 16) ~ 3,
           county %in% c(18, 19) ~ 4
         ))


# Gender population (%)
gender_pop <- read_excel("./Data/TW_GENDER_POP_CALC.xlsx")
male <- gender_pop %>% pull(Percent)
names(male) <- c(1, 2) %>% as.factor()

# Age population (%)
age_pop <- read_excel("./Data/TW_AGE_CALC.xlsx")
age_pop <- age_pop %>% 
  mutate(age_binned = case_when(
    Age < 20 ~ 0,
    Age >= 20 & Age < 30 ~ 1,
    Age >= 30 & Age < 40 ~ 2,
    Age >= 40 & Age < 50 ~ 3,
    Age >= 50 & Age <= 55 ~ 4,
    Age > 55 ~ 5
    )) %>%
  group_by(age_binned) %>%
  dplyr::summarize(group_sum = sum(N)) %>%
  ungroup() %>%
  filter(!age_binned %in% c(0, 5)) %>%
  mutate(sample_sum = sum(group_sum),
         group_perc = group_sum / sample_sum)

age_binned <- age_pop %>% pull(group_perc)
names(age_binned) <- age_pop %>% pull(age_binned) %>% as.factor()

# Region population (%)
region_pop <- read_excel("./Data/TW_COUNTY_CALC.xlsx")
region_pop <- region_pop %>% 
  mutate(region_name = case_when(
    Code %in% c(1, 2, 3, 4, 5, 6, 17) ~ 1,
    Code %in% c(7, 8, 9, 10, 11) ~ 2,
    Code %in% c(12, 13, 14, 15, 16) ~ 3,
    Code %in% c(18, 19) ~ 4
  )) %>% 
  group_by(region_name) %>%
  dplyr::summarize(group_sum = sum(N)) %>%  
  mutate(region_perc = group_sum / sum(group_sum))

region <- region_pop$region_perc
names(region) <- region_pop %>% pull(region_name) %>% as.factor()


# Age
age_compare <- capture.output({
  cat("===== Difference on Age =======\n")
  cat("Note: 1 = 20~29; 2 = 30~39; 3 = 40~49; 4 = 50-55\n\n")
  
  cat("Sample:\n")
  age_table <- round(prop.table(table(df_rep$age_binned)) * 100, 2)
  print(age_table)
  
  cat("\n")
  
  cat("Population:\n")
  age_pop_data <- round(age_binned * 100, 2)
  print(age_pop_data)
  
  print(chisq.test(table(df_rep$age_binned), p = age_binned))
})

# Gender
gender_compare <- capture.output({
  cat("===== Difference on Gender =======\n")
  cat("Note: 1 = Female; 2 = Male\n\n")
  
  cat("Sample:\n")
  sex_table <- round(prop.table(table(df_rep$male)) * 100, 2)
  print(sex_table)
  
  cat("\n")
  
  cat("Population:\n")
  sex_pop_data <- round(male * 100, 2)
  print(sex_pop_data)
  
  print(chisq.test(table(df_rep$male), p = male))
})

# Region
region_compare <- capture.output({
  cat("===== Difference on Region =======\n")
  cat("Note: 1 = North; 2 = Central; 3 = South; 4 = East\n\n")
  
  cat("Sample:\n")
  region_table <- round(prop.table(table(df_rep$region)) * 100, 2)
  print(region_table)
  
  cat("\n")
  
  cat("Population:\n")
  region_pop_data <- round(region * 100, 2)
  print(region_pop_data)
  
  print(chisq.test(table(df_rep$region), p = region))
})

all_comparisons <- c(age_compare, "\n", gender_compare, "\n", region_compare)
writeLines(all_comparisons, "./Out/Table_1.txt")


#===========================================
#               Figure C1
#===========================================

figure_c1 <- ggplot(data = df_rep, aes(x = as.factor(gov_trust))) +
  geom_bar(fill = "gray", alpha = 0.95) +
  geom_text(stat = "count", 
            aes(label = paste0(..count.., " (", scales::percent(..count../sum(..count..)), ")")),
            vjust = -0.3, hjust = 0.5) +
  labs(x = "Gov Trust", y = "Count", title = "Distribution of Gov Trust") +
  theme_bw() +
  theme(text = element_text(size = 12))  
ggsave(figure_c1, filename = "./Out/Figure_C1.png", width = 10, height = 6)




